{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Figure 6 - Supervised Clustering R-squared"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "import xgboost as xgb\n",
    "import numpy as np\n",
    "import shap\n",
    "import matplotlib.pyplot as pl\n",
    "import scipy.cluster\n",
    "import pickle\n",
    "import random\n",
    "\n",
    "import xgboost\n",
    "import sklearn.datasets\n",
    "import shap\n",
    "\n",
    "def plot_m(m, y, name=\"\", color=\"\"):\n",
    "    m = np.nan_to_num(m)\n",
    "    D = np.vstack([np.sum((m - m[i,:])**2, 1) for i in range(m.shape[0])])\n",
    "    clust = scipy.cluster.hierarchy.complete(D)\n",
    "\n",
    "    group_vals = [[y[i]] for i in range(m.shape[0])]\n",
    "    for i in range(len(clust)):\n",
    "        group_vals.append([])\n",
    "        #print(clust[i,0], clust[i,1])\n",
    "        group_vals[-1].extend(group_vals[int(clust[i,0])])\n",
    "        group_vals[-1].extend(group_vals[int(clust[i,1])])\n",
    "\n",
    "    count = m.shape[0]\n",
    "    counts = [count]\n",
    "    var = 1.0\n",
    "    variances = [var]\n",
    "    total_var = np.var(y)\n",
    "    for i in range(m.shape[0], len(group_vals)):\n",
    "        #print(np.var(group_vals[i]))\n",
    "        count = count - 1\n",
    "        counts.append(count)\n",
    "        clust_ind = i-m.shape[0]\n",
    "        ind1 = int(clust[clust_ind,0])\n",
    "        ind2 = int(clust[clust_ind,1])\n",
    "        var = var - np.var(group_vals[ind1])*len(group_vals[ind1])\n",
    "        var = var - np.var(group_vals[ind2])*len(group_vals[ind2])\n",
    "        var = var + np.var(group_vals[i])*(len(group_vals[ind1])+len(group_vals[ind2]))\n",
    "        variances.append(1-(var/total_var)/m.shape[0])\n",
    "    #print(variances)\n",
    "    #print(np.mean(variances), m.shape[0])\n",
    "\n",
    "    return pl.plot([x for x in counts], np.array(variances), color=color, linewidth=2, label=name+\" (AUC = \"+str(round(np.mean(variances),2))+\")\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/slund1/anaconda3/lib/python3.5/site-packages/scipy/cluster/hierarchy.py:298: ClusterWarning: scipy.cluster: The symmetric non-negative hollow observation matrix looks suspiciously like an uncondensed distance matrix\n",
      "  return linkage(y, method='complete', metric='euclidean')\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAARYAAADUCAYAAABZGgxEAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJztnXl4lNXVwH8nk8kGYQ/Ivu+LYVHUqgQVCyiuLbi0tS6l\ndbd1o9pqW6uirRatinvFLein4oKI1QKCCkIiOyIgayACCYHsy2TO98d9MyQhmcwkM2QC9/c8r/O+\n973LmXnM4dxzzz1XVBWLxWIJJVGNLYDFYjn2sIrFYrGEHKtYLBZLyLGKxWKxhByrWCwWS8ixisVi\nsYScsCkWEXlZRPaJyLpa3ouIPCkiW0RkjYiMCJcsFovl6BJOi+UVYLyf9xOAvs41FZgZRlksFstR\nJGyKRVUXAwf8VLkQeFUNy4BWItIxXPJYLJajR2P6WDoDuyo9ZzhlFouliRPd2AIEgohMxUyXaCux\nI3u4mjWyRFUpF6HEFU15VBReEcpFKJcoVMArgiJ4BUpc0RS6YyhxuUDEb5/uKIiOEgRwu4QogYoW\n0VEQJeJ7Ft9/qFIW4xJiXEJ0lHkWAUGcz8NlFksgpKenZ6lqUiB1G1Ox7Aa6Vnru4pQdgao+DzwP\nMPzEEfrB7HlQrqh60XIveBX1eqFcodSD5OQhpR7zrtwL3opPrVJWVualrMQDpR60zIOUlVNe6qGk\npBy8XtTrJbqolOb7sonPLSA2vxBUEa+aT1Wa5eYTU1Ia+Lf2mssTFUWJ283BhARy4+Ipjo6mJDqa\n7IRmZLjj2dSqLXsTmnMwLp49zRM5EJdAbkwseTGxeFyuev/oNWGUF0SL4HZBTJTQOi6K1nFRNHNH\nER0FLjGKruZPo/yGtYuhWwsXHZu56NfGTXO3IFZzHTOIyI5A6zamYvkQuElEZgOjgUOqmllXI5c7\nis4DTwi7cAFT7oWcfNixD/KLoagE8gqhoARKyqDMKC4KSmDTbli9FbJyiS73El1SQrOSEjqTE9SQ\nZTFuSuNjKYmP812H2raiOC4WT7SbHX17sDepLWskgbXuRHLKoMyreLzms8p9OShQWg6lKHgAlP1F\n3gb/NC6BBLextlwidGpulE6sY0nFuiDWJcRGCzFR4txDc3cUHRJcuF3gjhJz+e7NZ4xLGNjWTctY\nGzERiYRNsYhIKpACtBORDOB+wA2gqs8C84CJwBagELg6XLKEFVcUtGthrmAo80BRKWTlQkExFJcZ\npZSVC9m5RgkVlEBOHvx4EHILIK8I8otxl5bhLi2j2aH8Grv+ySfV5GvVDM46ESaMgh4doF0ixMZA\nrBviYyivpnSKPcrBEuVAcTlFHqXcCx6vUq7g8UK56hGf2UVe1uwvI6fYy6acMnbnm7Z5pRW755UD\nxV7WZZXV62euiegoaB0bRXy0EO8W4qOFXi3dtIuPIsZllFCMyyitGNfhqWHr2ChaxkbhjoLmMVH0\nax1Ny9goEtxWSYUKaWppE0aNGqVpaWmNLUbjoWoUUn4x5BcZZZNXCDv2g8cDmTmwYSdkHjAKad9B\n//11bw/JvSAxHgZ1g5YJ0KE1tGkOvTtCXEy9RS0tV4o8itdRQNsOecgp9lLqVUo8Skm5UloOJeUV\n9+bzYImyv7DcUXRGqZX5LCxzn19mFJk3hP/7nnRCDJ2bu4iLNsqnVVxUFSupZWwUfVpH07+1mwS3\n0DY+tFPSSEdE0lV1VCB1m4Tz1lIJEUiINVf7lofLT6+lfkkZ7NwP81bAkvWwNwcO5JvpWXGpmcLt\n2Fdz25YJMLQndGkLZw6Bvp2g1wnQPD4gUSsshAqSEkL7h1hY5iWv1CivIo+SW+rlu+wyn5IqLYdS\nb8W9UUgl5UpWUTn5pcY6+7HAy558D9nFXlb8WMqKIMYf0SGGgW3ctImLolsLF10To7mgdzzx1vKx\nFstxTbkXvv0Bdu6DXVnGyjlY4Fg7ObUrnH6d4NyRkNTCTK0GdTNTwYTYoyt/CMkr9bJ0TwkFZUZJ\nZRWVk1dqLCSPGl/U3sJyVu8v5ccCLz8WlNfYT4wLBrV1M7pjLBf2TuDUTrHEujgmlE0wFotVLJaa\nUTVTqv2HYPkm2JgBP2Qa309N8w9XFAzsCif3N9Or6ChT5nJBpzZw9onH1Np2Zr6HZZml5JV62VdY\nzu78chbtKmbVvpp9SG3jo+iWGM2l/RIY1SGGXq2i6ZoYTVx00/lNrGKxhI+CYnjva6Nw9h2EDbtg\n+17YW4cv56S+8MuzjNJJiDWfLSMrHqmhqCr7C718ubuEuVsLmftDESWOn6mslkW2rokuhraLoV18\nFP3auEmMETo3d9EhwUWHZi76tIqOmCV7q1gsR58DefB9BnyzyaxqeZz4oeJS+O+3xq9TmWgX9D4B\nhvSADq2M87hDazihFZzUD1okNMrXCAflXrN8vzyzhHnbitiYXcaOXA8Z+eV46ljVbxcfRc+W0ZzT\nPY6keBeJMcK47vF0b3n03aNWsVgii4wseH6+8d3syoLCEthcy5QKIM4No/oZxdO+pZleJcbDgK4Q\nX/9Vqkij3Kus2lfKnoJyduaWk5HnIbfUS0ZeOfuLvGzILuVQSc2/0Tnd4/jnmNYMbucmOuroWDRW\nsVgin0InYHDNdhOjc7DATK1WbYXvawzAhvhYGDME2iYai+bk/saR3CbRxOpENX0HaWVKy5UduR4W\n7ixmR66HgjJlbVYZC3YW++q4o+C0TrHM/1mHsPtrrGKxNF1UzfL4is1merX8e+PPyc6DzXtqb9e6\nOYzuD13awYAuRvk0i4cRvSDx2JlWAWTkefjTlwf5Ylcx23PN6lSvltHccVILpvRPoE2Y4mtCqlhE\npD3wE6ATUASsA9JUteEx3/XAKpbjmLTNRunkF8PWTKN8snON4skvrrmNiJlOdWhllsYvOQ3OToaY\nYyOE69X1+Vz1SbbvOUpgQs94hrRzkxgTRUK0cQb/vH9Cg53AIVEsIjIWmAa0AVYC+4A4oB/QG3gH\neExVcxskbZBYxWI5AlWzOrU1E7btNU7kghLYsqdmK6dNczj/ZBgz1EQdt0xo0lbNpgNlzNlcyDub\nCknbW/OG2PcuTOLivg37jqFSLP8A/q2qO2t4Fw2cD7hU9d2GCBssVrFYgiIn3+zB2pgB89Ph4xVm\nP1Z1BnY1S+DnjoArU466mKHi270lfL27hNxSJa/Uy+r9pXyyzVhzNw9P5ImzWtfbcrE+FoulNso8\n8PV38Om3xqLZsMtMpSrTsbXZtDmoG7RtAbHR0DUJhvZoFJEbwoGics75v32s3GcsmXbxUVw2oBmP\np7TG7QpOwYTKYvmDv4aq+nhQUoUIq1gsIScrF9K3wCdp8O5XZh9VTYwdBrP+YHaFNyFUldSNhVwz\nP4sSZyfCw2e0Ytrolv4bViNUiuV+57Y/cBImfwrAJGC5qv4iKKlChFUslrDiKYcv18O6HWY5vNjZ\nxLnyB/M+Sowlc2JPGDfcbM5sFte4MgdIfqmX51bnc8cXOUzsGc/Hl7YPqn2oV4UWA+epap7znAh8\nrKpnBiVViLCKxdIozP4Cbn/JKJ7KRLsOrzhddbZxBsdX23keQazdX8qwWZl0S3Sx47ddgmob6rQJ\nHYDKruZSp8xiOX64bAxceKrxx+zNgf+uhMXrTEDf7mxzfbXhcP3rfgp/vdIongiidyvzJ78zr5wz\nUn/krUnt6NQ89EvvgVgs9wKTgTlO0UXA26r6UMilCQBrsVgiioJiM1V6+b9mF/iOfSYRFxjH7z2T\n4eenR5Rf5qp5Wby6oQCAW0ckMuOsNgG1C/mqkHNK4RnO42JVXRmQJGHAKhZLRKMKf58Nry2AQ4Wm\nLNplsvH1aA9XjzNO4Ebmv9uL+Ok7+2gXH0Xm9V0C2m8UjGIJdHNFApCrqk8AGSLSM8B2FsvxhQj8\n+XLY+Bz881qzl6nca4L2Pv0WrvyHmUo1MuO6x9G3dTRZRSZzXqipU7E4q0N3A390itzA6yGXxGI5\nloiKMvlnlvwDfngRPn3ABOGVe2HYTfDIOyamppEQEU7rZDL+vbupMOT9B2KxXAxcABQAqOoeIDHk\nklgsxyrN4sxq0bM3QX/nsM/H50D/38Kj7zSaWCM6mBQUj6Xlkl8a2q1/gSiWUjWOGAUQkWMr7ZfF\ncrQY0AW+eAReutXsvi4oNnlqGin6/cqBh/+UQz0dCkSxvC0iz2EObf8N8DnwQkilsFiOF0TMBsg1\nT5vnvCKzn6kRaBvv4qbhZvJx64IDrMgsoTxE56nUqVhU9Z+YnczvYqJw71PVf4dkdIvleCXaBUO6\nm/tXPm80MX41yFgta7PKOPmNH7nzi9A4lgNaFVLVz1T1TlW9Q1U/C7RzERkvIt+LyBYRmVbD+5Yi\n8pGIrBaR9SLSNE9DtFjqw5Ae5vORd4zPpRE4qWMs71+UxPm9zFlRL63Np8TTcKslkFWhS0Rks4gc\nEpFcEckTkTpzsIiIC3gamAAMAi4XkUHVqt0IbFDVEzHHsT4mIsdOUlOLxR8P/cpE9IJRLgtWN4oY\nF/ZJ4KNL2jO0nZvcUmVZZkmD+wzEYnkUuEBVW6pqC1VNVNVADio+GdiiqltVtRSYDVxYrY4CiWIS\nRDQHDuAcS26xHPM0i4MnpsIfJ5vnm56tOVfMUeKsbmYzZeWcuvUlEMWyV1W/q0ffnYFdlZ4znLLK\nPAUMBPYAa4Fba0p5KSJTRSRNRNL2799fD1Eslgjm5knm3KXsXPi80YLaGesolie+zeWpb3PZV8tp\nj4EQiGJJE5G3RORyZ1p0iYhcUu8Rq/JTYBUmn24y8JSIHGENqerzqjpKVUclJSWFaGiLJUJwRUHK\nUHO/ObPRxBjTJY6WscKhEuXmBTncvbj+jtxAFEsLoBA4F5OLZRImLWVd7Aa6Vnru4pRV5mrgPTVs\nAbYBAwLo22I5tujdyXw+9VGjrRK1ioti0ZQTSOlqInLXZ9d8XGwg1LlfWlXru1KzAujr7CvaDVwG\nXFGtzk7gbGCJiHTALGdvred4FkvT5ZT+5mykgwVw939g7Xb42y+OehKp5PYxpJ6fRMeZGaz4sRRV\nrVeO3FotFhG5y/n8t4g8Wf2qq2NV9QA3AZ8C32FSLawXkd+JyO+cag8Ap4nIWuB/wN2qmhX0t7BY\nmjod28Dqp0yyKIDXF8I1MxrFmdshwRwbAjDloywW7CwKug9/qSknqepHInJVTe9VdVbQo4UAmzbB\nckxT6jEpF+5/Hcoc52nqXSa591Hkvi8P8sCyw0nG117VkaHtY22WfoulSbNqK/z6cch0HKhrnoIO\nrY+qCBU5WwDmX9qe8b0SQpePRUSSROSfIjJPRBZUXA2U2WKx+CO5F6Q/aXLpAiTfDM98bDLUHSXO\n7RHPBb1NRG5hkNG4gawKvYHxkfQE/gpsxzhmLRZLOHFFmQC6/p3Bq/DXN2HSX49qHpcEt/G1FIVB\nsbRV1ZeAMlX9QlWvAc4KWkKLxRI8pwyAT/4Gd11qTgPYexBe/sykXDgKxEdXKJbg8rUEolgqFrMz\nReQ8ERmOOc/ZYrEcDZrFwe2XwO8mmuf7XochN8DTc6Gw4ft6/JEQbVREYVnoLZa/i0hL4HbgDuBF\n4PdBymexWBrKVWfDT0fACa2NQvlbKlz9r7AOedhiCU6xBBIgN9e5PQSMDVYwi8USIprFwau3OycB\nvGWidL/53uTRdQWaFz84KhRLsBZLrYpFRP6Nk46yJlT1lqBGslgsoUEE/nwZvP81ZGTD6N/DB/dB\n57YhHyoczts0IN3PZbFYGpNzR5jPXVnGegkDIZ8KVY+sdXYda8UZzhaLpZF56CqThe4PL8Ca7WEZ\nwjcVCvVys4iMcvbyrAHWOWkkR9ZHSIvFEkJE4Bwn1D9ts4nWDTEVU6GX1gaX8DsQj8/LwA2q2kNV\nu2PSSf4nWAEtFksYaN8KRvYx98/OC3n3Xep5YHwgiqVcVZdUPKjql9j0kRZLZCACM2+EKIE5S2HK\ndFi2MWTdj+kaS7dEV9DtAlEsX4jIcyKSIiJjROQZYJGIjHAOi7dYLI1J9/Zwh5PUcdFamDwdvtvl\nv02AiAjXDG0edLtAFMuJQD/gfuAvmBy1w4HHgH8GPaLFYgk9t19i8rlMGAUlZfDSf0PWdYUDNxgC\nCZCzQXEWS1PghNZw5yXwSRqkfgHXnmsOom8gwauVwFaFXnNC+iueu4vI/+oxlsViCTeDu8NlZ4Kn\nHF78NCRd1iMzZUBToS+Bb0RkonN282fAjOCHslgsR4WbJ5nPd76E7Xsb3J3Uw2YJ5Ozm54DrgA+A\nvwFnqmp4wvwsFkvD6dMJzkmG4jJ47pMGdxcWi0VEfomJZfkV8AowT0SObgJOi8USHBVJuX9o+DlF\n9fGxBBL9cilwuqruA1JFZA4wC3PAmMViiUR6nmA+v8tocFdhsVhU9SJHqVQ8L8ecy2yxWCKVru1M\n0Ny+gzD9/2BPtkm3UA/CtSrUT0T+JyLrnOdhwF31GMtisRwt4mLgN+PN/b/eh+G3wFn31CtwLlyr\nQi8Af8RJUamqazCnGgYgkIwXke9FZIuITKulToqIrBKR9SLyRaCCWyyWOvjzZXDpT2BwN6MdNuyE\nCffDzv1BdRMuH0uCqi6vdsxinXuFRMQFPA2MAzKAFSLyoapuqFSnFfAMMF5Vd4pI+6Ckt1gsteOO\nhmduMPeFJXDZIybj3FcboNuYgLsJy1QIyBKR3jjZ5ETkZ0AgruaTgS2qulVVS4HZwIXV6lyBORR+\nJ0BlX47FYgkhCbEwur+5zzwQVNP6nN0ciMVyI/A8MEBEdgPbgCsDaNcZqDyhywBGV6vTD3CLyCIg\nEXhCVV8NoG+LxRIsnZzDNXZnB9UsLFMhVd0KnCMizYCoEGeQiwZGAmcD8cBSEVmmqpsqVxKRqcBU\ngG7duoVweIvlOKKTkxM3WMUSJuctAKpaEKRS2Q1U3gHVxSmrTAbwqdN3FrAYs5u6+tjPq+ooVR2V\nlJQUhAgWi8VHn47m8/vgYlvC5WOpLyuAviLSU0RiMCtJH1ar8wFwuohEi0gCZqr0XRhlsliOX3p2\nMEeI7DkAv3kyYF9LWC2WYFFVD3AT8ClGWbytqutF5Hci8junznfAfEw+3eXAi6q6LlwyWSzHNVFR\nMMmJbf3wG5gVWJKCsPhYHEvidqCbqv5GRPoC/SsdZFYrqjoPmFet7Nlqz/8A/hGU1BaLpX48dp35\nnL0YFq6Bu39Wp0kSLovlP0AJcKrzvBv4e/BDWSyWRifaBb+dYO5XbYU7X66zSbh8LL1V9VEOR94W\n1nMsi8USCfTvAqc4MS3z0urcQxQui6VUROI5HCDXG2PBWCyWpogrCt7/M7RNhOxck8bSD2FJ9IRJ\noj0f6CoibwD/w25CtFiaNiIwvLe5X7Daf9V6dB9IgNxnIvItcIozxq1OzInFYmnKXD8RPl8FWbl+\nq4Urg9zFgEdVP3ZWgjwiclHwQ1ksloiinZMjf/8hv9XC5by9X1V9I6vqQcz0yGKxNGWSAlQsYXLe\n1lSnfge6WiyWyKF1M+PIPVQIpbVnQgmXxZImIo+LSG/nehxIr8dYFoslkoiKgrYtzH1W7VZLuCyW\nm4FS4C3nKsGkUrBYLE2dJEex7K/dgRuuVaECoMa0khaLpYlT4Wfxa7GEIdGTiPQD7gB6VK6vqmcF\nPZrFYoks2lVYLH4USz26DcQJ+3/As8CLQHk9xrBYLJFKhcWyL7Q+lkAUi0dVZwbftcViiXh6djCf\nm/fUWiVcq0IficgNItJRRNpUXPUYy2KxRBqDupvPeStqVS7hUixXAXcCX2OWmdOBtHqMZbFYIo3B\n3aBja8gvhjPvgowjd+uE64jVnjVcvYIfymKxRBwJsfCRE0jvVXMUawgIKIJWRIYAg4C4ijJ7TIfF\ncozQNQlOHQBLN0LZkesz4UpNeT+QglEs84AJwJeAVSwWy7GC21EFZUeG9ocr8vZnmHN/flTVqzHH\nc7QMfiiLxRKxuF3ms0aLJTyJnopU1YtJl9AC2EfV84IsFktTJ8QWSyA+ljTn8PYXMCtC+cDS4Iey\nWCwRi1+LJXgC2SvkHFfPsyIyH2ihqmvqMZbFYolUfBZLDYollBaLiAxQ1Y0iMqKGdyNU9dvgh7NY\nLBGJz2KpYSpUj+78WSx/wBzE/lgN7xSocxOiiIwHngBcmFMOp9dS7yTM9OoyVX2nrn4tFkuIifYz\nFQqlxaKqU0UkCviTqn4VbMci4gKeBsZhDn9fISIfquqGGuo9Avw32DEsFkuIiPHjvK1Hd35XhZzV\noKfq0S/AycAWVd2qqqXAbODCGurdDLyLWW2yWCyNQbQfH0s9ugtkufl/InKpBJ/tpTOwq9JzhlPm\nQ0Q6AxcDdve0xdKYxDhTIU9Ny83hiWP5LSYnS4mI5IpInoj4P4gkcGYAdzuWUa2IyFQRSRORtP37\n94doaIvF4qPCYqkhqXa4lpsT69EvmMPjKwfSdXHKKjMKmO1oxHbARBHxqOr71WR4HngeYNSoUf4P\nmrVYLMFTsSrkCbPztmrH0hroS9VNiIvraLYC6CsiPTEK5TLgisoVVLVnpTFeAeZWVyoWi+Uo4D7K\nFouIXAfcirE4VmGOWl1KHcvNquoRkZuATzHLzS+r6noR+Z3z/tl6yGuxWMJBI1gstwInActUdayI\nDAAeCqRzVZ2H2RFduaxGhaKqvw6kT4vFEgZCbLEE4rwtVtViABGJVdWNQP96jGWxWCKVRrBYMpxN\niO8Dn4lIDrAj+KEsFkvE4m+vUD26CyQ15cWqelBV/wL8GXgJuKgeY1kslkilwmJ5cxF4q0Z/hCXR\nk4g8KSKnAajqF6r6oRNJa7FYjhWGdD98n1E17224Ej2lA38SkR9E5J8iMiroUSwWS2QzuDsM6GLu\nfzxQ5VW4pkKzVHUiZmXoe+AREdlcj7EsFksk07uj+dxTTbGEKedtBX2AAUB3YGPwQ1ksloimo3MO\n4c6q22bCYrGIyKOOhfI3YC0wSlUn1WMsi8USyQx3jgt77hPYsNNXHC6L5QfgVFUdr6qvqOrB4Iex\nWCwRz8Wnwai+kJULE+6DjRlA+Hwsz6nqkecuWiyWYwtXFLzyexjeG4rL4LUFQPh9LBaL5VgnqSXc\nO8Xcr9oKhC+k32KxHE/0d5adv88A1XolegoobQKAiMQBvwDigTdVNTSnR1sslsgiqQW0bg45+fBj\nDkKzoLsIxmJ5AigFcjD7hiwWy7GICPR3sshu2h1aH4uIpIpI70pFbTApKt8FWgc/lMViaTL0cxTL\nb5/CnZMXdHN/Fsu9wAMi8pizu/mfwBzgE+AvQY9ksViaDmOHmc+cfFotXh10c3/nCm0FrhCR04G3\ngI+B81T1yH3VFovl2GLiSXBOMny+iqgazhqqC39TodYiciMwCPg5xrfyqYjYqFuL5XigazsAompI\n/lQX/qZC7wMHMcepvqaqrwGTgOEi8lHwUlosliaFcySIlAevWPwtN7cF3sEsL/8WQFWLgL+JSMeg\nR7JYLE2LaGN3RHm8QQSmOE39vLsfmA+UA9Mqv1DVzOCGsVgsTQ7noHjxhtBiUdV3MUvLFovleMRR\nLFFl5RATXFN/ztsXRGRILe+aicg1InJlcMNZLJYmg7vCYvF7AnKN+JsKPQ3cJyJDgXXAfsxJiH2B\nFsDLwBtBj2ixWJoGLkex1GO52d9UaBUwWUSaY85Y7ggUAd+p6veBdC4i4zFbAVzAi6o6vdr7K4G7\nMRso84DrVTX4aByLxRJ6KiyW8tBaLACoaj6wKNiORcSFsXrGARnAChH5UFU3VKq2DRijqjkiMgFz\n8PvoYMeyWCxhwLFYQh3H0lBOBrao6lbnuJDZwIWVK6jq16qa4zwuw5wPbbFYIgGfxRJZiqUzsKvS\nc4ZTVhvXYvYhHYGITBWRNBFJ279/f01VGp0HH3yQwYMHM2zYMJKTk/nmm28ASElJIS0tzVdv+/bt\nDBlS1Sd+22230blzZ7yVnGSvvPIKSUlJJCcnM2jQIF544YUax125ciXXXnttlbKLLrqIU045pUrZ\nr3/9a955550qZc2bN/fdb9q0iYkTJ9K3b19GjBjB5MmT2bt3bxC/wJEcOHCAcePG0bdvX8aNG0dO\nTk6N9Z544gmGDBnC4MGDmTFjRpV3//73vxkwYACDBw/mrrvuAmDt2rX8+te/bpBslgCoUCye4KdC\nEZHoSUTGYhTL3TW9V9XnVXWUqo5KSko6usIFwNKlS5k7dy7ffvsta9as4fPPP6dr164BtfV6vcyZ\nM4euXbvyxRdfVHk3ZcoUVq1axaJFi7jnnntq/EN/6KGHuOWWW3zPBw8eJD09nUOHDrF169aAZCgu\nLua8887j+uuvZ/PmzXz77bfccMMNNFSJT58+nbPPPpvNmzdz9tlnM3369CPqrFu3jhdeeIHly5ez\nevVq5s6dy5YtWwBYuHAhH3zwAatXr2b9+vXccccdAAwdOpSMjAx27tx5RH+WEOKqv8VSq4/F8ZFc\nh5mezFfVryq9+5Oq/r2OvncDlf+6ujhl1ccZBrwITAhF8ij5Z3iOldY7utf6LjMzk3bt2hEbGwtA\nu3btAu530aJFDB48mClTppCamsrYsWOPqNO+fXt69+7Njh076NChg688Ly+PNWvWcOKJJ/rK3nvv\nPSZNmkSHDh2YPXs299xzT50yvPnmm5x66qlMmnR4G1hKSkrA36E2PvjgAxYtWgTAVVddRUpKCo88\n8kiVOt999x2jR48mISEBgDFjxvDee+9x1113MXPmTKZNm+b7Xdu3b+9rN2nSJGbPnu2zYixhwGex\nhHYq9BwwBsgGnhSRxyu9uySAvlcAfUWkp4jEAJcBH1auICLdgPeAX6rqpqAkjyDOPfdcdu3aRb9+\n/bjhhhuOsDyuvPJKkpOTSU5OZuLEiVXepaamcvnll3PxxRfz8ccfU1ZWdkT/W7duZevWrfTp06dK\neVpa2hHTqor+Lr/8clJTUwOSf926dYwcObLOenl5eb7vUf3asGHDEfX37t1Lx45m98cJJ5xQo8U1\nZMgQlixZQnZ2NoWFhcybN49du8wMetOmTSxZsoTRo0czZswYVqxY4Ws3atQolixZEtD3s9QTl1EP\noV4VOlmG3ElYAAARgklEQVRVhwGIyFPAMyLyHnA5AeTXVVWPiNwEfIpZbn5ZVdeLyO+c988C92H2\nJD3j5NX0qGqDjnD1Z1mEi+bNm5Oens6SJUtYuHAhU6ZMYfr06T4/wBtvvMGoUeZrbd++nfPPPx+A\n0tJS5s2bx+OPP05iYiKjR4/m008/9b1/6623+PLLL4mNjeW5556jTZs2VcbNzMyk8tRw7969bN68\nmdNPPx0Rwe12s27dOoYMGVJj3tJgc5kmJiayatWqoNpUHqum8QYOHMjdd9/NueeeS7NmzUhOTsbl\nmOAej4cDBw6wbNkyVqxYweTJk9m6dSsiQvv27dmzZ0+9ZLEEiNvZhOgJYRwLlYJ4VdUDTBWR+4AF\nQPNaW1VCVecB86qVPVvp/jrMdKvJ43K5SElJISUlhaFDhzJr1qw6HYyffvopBw8eZOjQoQAUFhYS\nHx/vUyxTpkzhqaeeqrV9fHw8xcXFvue3336bnJwcevbsCUBubi6pqak8+OCDtG3btorz9MCBA74p\n2+DBg4+wsmoiLy+PM844o8Z3b775JoMGDapS1qFDBzIzM+nYsSOZmZlVpjKVufbaa30O6HvuuYcu\nXcziYJcuXbjkkksQEU4++WSioqLIysoiKSmJ4uJi4uPj65TZ0gCi62+x+JsKpTkBbj5U9W/Af4Ae\nQY90DPP999+zefPh46xXrVpF9+51W06pqam8+OKLbN++ne3bt7Nt2zY+++wzCgsLAxp34MCBPkdn\nRX/z58/39Zeens7s2bMB4zN56623KC0tBcyqU4U/54orruDrr7/m448/9vW1ePFi1q1bV2W8Coul\npqu6UgG44IILmDVrFgCzZs3iwgsvPKIOwL59+wDYuXMn7733HldccQVgVrcWLlwImGlRaWmpTxlu\n2rTpiGmgJcRE19/H4i/y9he1lL+IcbZaHPLz87n55ps5ePAg0dHR9OnTh+eff95vm8LCQubPn8+z\nz/oMOJo1a8bpp5/ORx8Flu5mwIABHDp0iLy8PLKzs9mxY0eVZeaePXvSsmVLvvnmG84//3zS09MZ\nOXIkLpeL3r17+8aOj49n7ty53Hbbbdx222243W6GDRvGE088UY9f4zDTpk1j8uTJvPTSS3Tv3p23\n334bgD179nDdddcxb54xZi+99FKys7Nxu908/fTTtGrVCoBrrrmGa665hiFDhhATE8OsWbN806mF\nCxdy3nnnNUg+Sx00QLGIqvqvIOKKpHSUo0aN0spxIcc7//rXv0hMTOS6646JGWVAlJSUMGbMGL78\n8kuio4NMFGIJnMXr4OcPU3TKQBLO+CXc2SM9UB+o3zgWEUkEPgiJkJawcP311/uWY48Xdu7cyfTp\n061SCTcNWG72F8fSEZOe8sH6ymUJP3Fxcfzyl79sbDGOKn379qVv376NLcaxjzMVIsTLzUuAO1X1\nQz91LBbLsUoDfCz+pkI5+N/bY7FYjmVi3AC49h8iOsiwfn+KJQWY4BwBYrFYjjf6dYKeHYj+8QA3\nrlwWVNNaFYuqFgAXAMMbKJ7FYmmKuKPhDxcDkLIrsA2tFfhdFVLVcic61lIHLpeL5ORkhgwZws9/\n/vM6g9weeugh331NqRRqY8aMGbz66qu+Z4/HQ1JSEtOmVTlIgR49epCVleV7XrRokS+iF+CTTz5h\n1KhRDBo0iOHDh3P77bcHNL4/0tPTGTp0KH369OGWW26hplCGsrIyrrrqKoYOHcrAgQN5+OGHfe9S\nU1MZOnQow4YNY/z48T75n3rqKV5++eUGy2epB0ktAYgPNqxfVYO6MMroymDbheoaOXKkRiLNmjXz\n3V9xxRX62GOPBVx/27ZtOnjw4DrHKCsr06FDh2pZWZmvbN68eXraaadpr1691Ov1+sq7d++u+/fv\n9z0vXLhQzzvvPFVVXbt2rfbq1Uu/++47VVX1eDz6zDPP1Dl+XZx00km6dOlS9Xq9On78eJ03b94R\ndd544w2dMmWKqqoWFBRo9+7dddu2bVpWVqZJSUk+me+88069//77ffWSk5MbLJ+lHny1QbX9FfrF\niHsUSNMA/079LTe3AG7EOHA/BD4DbgJuB1YTqYm0O4Tp4IC9gX/dM844gzVr1gAmLH3Xrl0UFxdz\n6623MnXqVKZNm0ZRURHJyckMHjyYBx98kPLycn7zm9/w9ddf07lzZz744IMj9sIsWLCAESNGVInf\nSE1N5dZbb2XmzJksXbqU0047rU75Hn30Ue69914GDBgAGGvr+uuvD/j71URmZia5ubm+yN9f/epX\nvP/++0yYMKFKPRGhoKAAj8dDUVERMTExtGjRwvc/ZEFBAW3btiU3N9e3mzshIYEePXqwfPlyTj75\n5AbJaQmSOLNlMN5z5K57f/ibCr0G9AfWYjYKLgR+BlykqjVv+rDg8Xj45JNPfBsLX375ZdLT00lL\nS+PJJ58kOzub6dOnEx8fz6pVq3jjDaOwNm/ezI033sj69etp1aoV77575JFOX331VZX0BsXFxXz+\n+edMmjQpLGkSFi5cWGOKhJqU1+7du32bB8FsINy9+4j0O/zsZz+jWbNmdOzYkW7dunHHHXfQpk0b\n3G43M2fOZOjQoXTq1IkNGzZUyYxn0yQ0EnFmZShYxeIvjqWXqg4FEJEXgUygm6oW+2nT+ARhWYSS\nCgsEjMVS8Ufx5JNPMmfOHAB27drF5s2badu27RHte/bs6Ws/cuRItm/ffkSdzMxMBg4c6HueO3cu\nY8eOJT4+nksvvZQHHniAGTNm4HK5QpImYezYsfVOk1Aby5cvx+VysWfPHnJycjjjjDM455xz6Nq1\nKzNnzmTlypX06tWLm2++mYcffpg//elPgEnytHHjxpDKYgmAeMdiCfIIEH+KxaeiVLVcRDIiXqk0\nIhUWSGUWLVrE559/ztKlS0lISCAlJaVKmoPKVA7Ld7lcFBUV1ThG5fapqal8+eWX9OjRA4Ds7GwW\nLFjAuHHjfGkSKnYDV0+TkJ6eXiXzXE0sXLiQ3//+90eUJyQk8PXXX1cp69y5MxkZGb7njIwMOnc+\nMgzqzTffZPz48bjdbtq3b89PfvIT0tLSyM42yQN79+4NwOTJk6uksrRpEhqJMEyFThSRXOfKA4ZV\n3ItIbv0lPX44dOgQrVu3JiEhgY0bN7Js2eFYALfbXWO2OH9UTpOQm5vLkiVL2Llzpy9NwtNPP+2b\nDqWkpPDaa68BUF5ezuuvv+5Lk3DnnXfy0EMPsWmTSdrn9Xqr7LKuoMJiqX5VVyoAHTt2pEWLFixb\ntgxV5dVXX60xTUK3bt1YsGABAAUFBSxbtowBAwbQuXNnNmzY4Muz+9lnn1WxzmyahEYi1IpFVV2q\n2sK5ElU1utJ9i4ZJe3wwfvx4PB4PAwcOZNq0aVVSGkydOpVhw4Zx5ZWBO5snTJjA4sWLAZgzZw5n\nnXVWFUvnwgsv5KOPPqKkpIQ///nPbNmyhRNPPJHhw4fTp08ffvELkwlj2LBhzJgxg8svv5yBAwcy\nZMiQgBNv++OZZ57huuuuo0+fPvTu3dvnuP3www+57777ALjxxhvJz89n8ODBnHTSSVx99dUMGzaM\nTp06cf/993PmmWcybNgwVq1aVSVf71dffcW4ceMaLKMlSHyKJbipUJ1pEyKN4z1twsUXX8yjjz56\nXG3CW7lyJY8//rjPArMcRVThBPMPkux7MzRpEyyRx/Tp08nMzGxsMY4qWVlZPPDAA40txvGJCBrr\nDrqZTWjRxOjfvz/9+/dvbDGOKnYK1LhoXAxSEjrnrcVisfiWnIPBKhaLxeIXTUwIuo1VLBaLxS/a\nqlnQbcKqWERkvIh8LyJbRGRaDe9FRJ503q8RkRHhlMdisdSDlhFksThnPz8NTAAGAZeLSPXDZyYA\nfZ1rKjAzXPJYLJb6oS0jy2I5GdiiqltVtRSYDVQPxbwQeNXZoL0MaOUk8bZYLJFChCmWzsCuSs8Z\nHJlDN5A6FoulMamHj6VJxLGIyFTMVAkgX0S+91O9HZDl530kYWUND1bW8BBwAFU4FctuoGul5y5O\nWbB1UNXnAf9nljqISFqgYceNjZU1PFhZw4OIBLyXJpxToRVAXxHpKSIxwGWYTHSV+RD4lbM6dApw\nSFWPr3h1i+UYJGwWi6p6ROQm4FPABbysqutF5HfO+2eBecBEYAtQCFwdLnksFsvRI6w+FlWdh1Ee\nlcuerXSvmLy6oSSgKVOEYGUND1bW8BCwrE0ubYLFYol8bEi/xWIJOU1OsYjIdhFZKyKrKrzUItJG\nRD4Tkc3OZ+tK9f/obBn4XkR+2gjyukRkpYjMjVRZRSRORJaLyGoRWS8if41gWbuKyEIR2eDIemuk\nyuqM/bKI7BORdZXKIlJWf9S1PecIAj2AKFIuYDvQrlrZo8A0534a8IhzPwhzBlIs0BP4AXAdZXn/\nALwJzI1UWQEBmjv3buAb4JQIlbUjMMK5TwQ2OfJEnKzO+GcCI4B1TeH/11q+g8uRpRcQ48g4yF+b\nJmex1MKFwCznfhZwUaXy2apaoqrbMKtPR+3EKxHpApwHvBjJsqoh33l0O5dGqKyZqvqtc58HfIeJ\n1o44WR0ZFwMHqhVHpKx+CGR7ThWaomJR4HMRSXcicgE66OH4lx+BDs59Y28ZmAHcBXgrlUWkrM6U\nbRWwD/hMVb+JVFkrEJEewHCMhRXRslajKckK9ZCrSYT0V+N0Vd0tIu2Bz0SkyilWqqoi0uhLXSJy\nPrBPVdNFJKWmOpEiK5izo4BkEWkFzBGRIdXeR4ysACLSHHgXuE1VcysfxhZpsvqjKckaDE3OYlHV\n3c7nPmAOxkzbW7Er2vnc51QPaMtAmPgJcIGIbMeYjmeJyOsRKqsPVT2IOU53PBEqq4i4MUrlDVV9\nzymOSFlroSnJCvWQq0kpFhFpJiKJFffAucA6zNaAq5xqVwEfOPcfApeJSKyI9MTkfVl+NGRV1T+q\nahdV7YHZzrBAVX8RibKKSJJjqSAi8cA4YGOEyirAS8B3qvp4pVcRJ6sfmpKsENj2nKo0tsc5SO90\nL4xHejWwHrjXKW8L/A/YDHwOtKnU5l6MR/t7YEIjyZ3C4VWhiJMVGAasBNZgFPV9ESzr6Rg/2xpg\nlXNNjERZnbFTMeeel2F8E9dGqqx1fI+JmBW4Hyr+7vxdNvLWYrGEnCY1FbJYLE0Dq1gsFkvIsYrF\nYrGEHKtYLBZLyLGKxWKxhJymGHlrOYqIyMPAf4GWwEBVfbiRRbI0AazFYqmL0cAyYAywuL6diIj9\nR+w4wioWS42IyD9EZA1wErAUuA6YKSL31VC3t4gsc/Lk/F1E8p3yFBFZIiIfAhucsj+IyDrnus0p\n61EtX8kdIvIX536RiDwhJv/OOhE52Skf45StEpPvJjG8v4glGOy/IpYaUdU7ReRt4FeYnDKLVPUn\ntVR/AnhCVVPFSZZeiRHAEFXdJiIjMQnTR2NywHwjIl8AOXWIk6CqySJyJvAyMAS4A7hRVb9yNiQW\n1+d7WsKDtVgs/hiB2T4xAJP3pDZOBf7PuX+z2rvlanKLgAnHn6OqBWryv7wHnBGAHKngy23SwtnX\n9BXwuIjcArRSVU8gX8hydLAWi+UIRCQZeAWzizULSDDFsgo4VVWLguiuIIA6Hqr+IxdX7X31fSeq\nqtNF5GPMHpavROSnqroRS0RgLRbLEajqKlVN5nDaxwXAT1U1uRalsgy41Lm/zE/XS4CLRCTB2Z1+\nsVO2F2gvIm1FJBY4v1q7KQAicjrmULtDItJbVdeq6iOY3bcD6vdtLeHAWiyWGhGRJCBHVb0iMkBV\nN/ipfhvwuojcC8wHDtVUSVW/FZFXOJwK4EVVXemM9zenfDcmZUNlikVkJSZl5jUVY4rIWEx2vvXA\nJ8F+R0v4sLubLQ1GRBKAIlVVEbkMuFxV/eZEDaLvRcAdqhrwucGWxsdaLJZQMBJ4yknCdJDDVoXl\nOMVaLBaLJeRY563FYgk5VrFYLJaQYxWLxWIJOVaxWCyWkGMVi8ViCTlWsVgslpDz/w/kbjJzJ0hO\nAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x1a15e06978>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "module_expression = np.loadtxt(\"data/module_expression.txt\")\n",
    "cf = lambda x: -1000 if x == b'NA' else x\n",
    "neuropath = np.loadtxt(\"data/neuropath.txt\", converters={i:cf for i in range(8)})\n",
    "\n",
    "target = neuropath[:,1]\n",
    "dtrain = xgb.DMatrix(module_expression, label=target)\n",
    "dtrain = xgb.DMatrix(module_expression, label=target)\n",
    "param = { \"max_depth\": 6, \"base_score\": np.mean(target), \"eta\": 0.01}\n",
    "bst = xgb.train(param, dtrain, 300)\n",
    "out = bst.predict(xgb.DMatrix(module_expression), pred_contribs=True)\n",
    "out_path = bst.predict(xgb.DMatrix(module_expression), pred_contribs=True, approx_contribs=True)\n",
    "out_pred = bst.predict(xgb.DMatrix(module_expression))\n",
    "\n",
    "pl.close()\n",
    "pl.rcParams[\"figure.figsize\"] = (4,3)\n",
    "plot_m(out, out_pred, \"SHAP\", color=\"#008BE0\")\n",
    "plot_m(out_path, out_pred, \"Path\", color=\"#ff165a\")\n",
    "#plot_m(module_expression, target, \"Unsupervised\", color=\"#18C45D\")\n",
    "pl.legend(loc=\"lower left\", frameon=False, prop={'size':10})\n",
    "pl.ylabel(\"R^2 (% variance explained)\")\n",
    "pl.xlabel(\"# groups\")\n",
    "pl.ylim(0,1)\n",
    "pl.xlim(0,len(target))\n",
    "pl.gca().invert_xaxis()\n",
    "#pl.figsize(5,4)\n",
    "#pl.figure(num=0, figsize=(4, 3))\n",
    "#pl.savefig(\"alz2.pdf\")\n",
    "pl.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# load the data\n",
    "raw_train_data = np.genfromtxt(\"data/adult.data\", delimiter=\",\", dtype=None, autostrip=True, deletechars=[\"'\"])\n",
    "raw_test_data = np.genfromtxt(\"data/adult.test\", delimiter=\",\", dtype=None, autostrip=True, deletechars=[\"'\"], skip_header=1)\n",
    "\n",
    "# extract the category options in the training data\n",
    "col_names = [\n",
    "    \"age\", \"workclass\", \"fnlwgt\", \"education\", \"education-num\",\n",
    "    \"marital-status\", \"occupation\", \"relationship\", \"race\", \"sex\", \"capital-gain\",\n",
    "    \"capital-loss\", \"hours-per-week\", \"native-country\"\n",
    "]\n",
    "work_classes = list(set([v[col_names.index(\"workclass\")] for v in raw_train_data]))\n",
    "education_types = list(set([v[col_names.index(\"education\")] for v in raw_train_data]))\n",
    "marriage_statuses = list(set([v[col_names.index(\"marital-status\")] for v in raw_train_data]))\n",
    "occupations = list(set([v[col_names.index(\"occupation\")] for v in raw_train_data]))\n",
    "relationships = list(set([v[col_names.index(\"relationship\")] for v in raw_train_data]))\n",
    "races = list(set([v[col_names.index(\"race\")] for v in raw_train_data]))\n",
    "sexes = list(set([v[col_names.index(\"sex\")] for v in raw_train_data]))\n",
    "countries = list(set([v[col_names.index(\"native-country\")] for v in raw_train_data]))\n",
    "types = [work_classes, education_types, marriage_statuses, occupations, relationships, races, sexes, countries]\n",
    "N = raw_train_data.shape[0]\n",
    "P = sum(map(len, types)) + 5\n",
    "\n",
    "def build_matrix(data, P):\n",
    "    N = data.shape[0]\n",
    "    X = np.zeros((N, P))\n",
    "    \n",
    "    group_names = []\n",
    "    feature_groups = []\n",
    "    \n",
    "    \n",
    "    def assign_class(i, offset, name, classes, data_col):\n",
    "        if i == 0:\n",
    "            group_names.append(name)\n",
    "            feature_groups.append(list(range(offset, offset+len(classes))))\n",
    "        j = classes.index(data[i][data_col])\n",
    "        X[i,offset+j] = 1\n",
    "        offset += len(classes)\n",
    "        return offset\n",
    "    \n",
    "    def assign_num(i, offset, name, data_col):\n",
    "        if i == 0:\n",
    "            group_names.append(name)\n",
    "            feature_groups.append([offset])\n",
    "        X[i,offset] = data[i][data_col]\n",
    "        offset += 1\n",
    "        return offset\n",
    "    \n",
    "    for i in range(N):\n",
    "        offset = 0\n",
    "        offset = assign_num(i, offset, \"Age\", 0)\n",
    "        offset = assign_class(i, offset, \"Work class\", work_classes, 1)\n",
    "        offset = assign_class(i, offset, \"Education\", education_types, 3)\n",
    "        offset = assign_num(i, offset, \"Years in school\", 4)\n",
    "        offset = assign_class(i, offset, \"Marital status\", marriage_statuses, 5)\n",
    "        offset = assign_class(i, offset, \"Occupation\", occupations, 6)\n",
    "        offset = assign_class(i, offset, \"Relationship\", relationships, 7)\n",
    "        offset = assign_class(i, offset, \"Race\", races, 8)\n",
    "        offset = assign_class(i, offset, \"Sex\", sexes, 9)\n",
    "        offset = assign_num(i, offset, \"Capital gain\", 10)\n",
    "        offset = assign_num(i, offset, \"Capital loss\", 11)\n",
    "        offset = assign_num(i, offset, \"Weekly working hours\", 12)\n",
    "        offset = assign_class(i, offset, \"Native country\", countries, 13)\n",
    "        \n",
    "    y = np.array(list(v[-1] == b'>50K' for v in data))\n",
    "    \n",
    "    return X,y,group_names,feature_groups\n",
    "\n",
    "def group_values(x):\n",
    "    out = []\n",
    "    offset = 0\n",
    "    \n",
    "    def add_class(offset, class_members):\n",
    "        pos = -1\n",
    "        try:\n",
    "            pos = list(x[offset:offset+len(class_members)]).index(1)\n",
    "        except:\n",
    "            pass\n",
    "        out.append(\"\" if pos == -1 else class_members[pos])\n",
    "        offset += len(class_members)\n",
    "        return offset\n",
    "    \n",
    "    out.append(x[0])\n",
    "    offset += 1\n",
    "    offset = add_class(offset, work_classes)\n",
    "    offset = add_class(offset, education_types)\n",
    "    out.append(x[offset])\n",
    "    offset += 1\n",
    "    offset = add_class(offset, marriage_statuses)\n",
    "    offset = add_class(offset, occupations)\n",
    "    offset = add_class(offset, relationships)\n",
    "    offset = add_class(offset, races)\n",
    "    offset = add_class(offset, sexes)\n",
    "    out.append(x[offset])\n",
    "    offset += 1\n",
    "    out.append(x[offset])\n",
    "    offset += 1\n",
    "    out.append(x[offset])\n",
    "    offset += 1\n",
    "    offset = add_class(offset, countries)\n",
    "    \n",
    "    return out\n",
    "\n",
    "# build the training data\n",
    "train_data,train_labels,group_names,feature_groups = build_matrix(raw_train_data, P)\n",
    "data_median = shap.DenseData(np.reshape(np.median(train_data,0), (1,train_data.shape[1])), group_names, feature_groups)\n",
    "\n",
    "# and test data\n",
    "test_data,test_labels,group_names,feature_groups = build_matrix(raw_test_data, P)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "inds = list(range(train_data.shape[0]))\n",
    "random.shuffle(inds)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "module_expression = train_data#np.loadtxt(\"data/module_expression.txt\")\n",
    "#cognitive_score = np.loadtxt(\"data/cognitive_score.txt\")\n",
    "#cf = lambda x: -1000 if x == b'NA' else x\n",
    "#neuropath = np.loadtxt(\"data/neuropath.txt\", converters={i:cf for i in range(8)})\n",
    "\n",
    "cut_ind = 31000\n",
    "\n",
    "target = train_labels#neuropath[:,label_ind]\n",
    "module_expression_train = module_expression[inds[:cut_ind],:]\n",
    "target_train = target[inds[:cut_ind]]\n",
    "module_expression_test = module_expression[inds[cut_ind:],:]\n",
    "target_test = target[inds[cut_ind:]]\n",
    "\n",
    "dtrain = xgb.DMatrix(module_expression_train, label=target_train)\n",
    "dtest = xgb.DMatrix(module_expression_test, label=target_test)\n",
    "param = { \"max_depth\": 6, \"base_score\": np.mean(target_train), \"eta\": 0.1, \"colsample_bytree\": 0.1}\n",
    "param = { \"max_depth\": 6, \"base_score\": np.mean(target_train), \"eta\": 0.1, \"subsample\": 0.5}\n",
    "bst = xgb.train(param, dtrain, 200)\n",
    "out = bst.predict(xgb.DMatrix(module_expression_test), pred_contribs=True)\n",
    "out_path = bst.predict(xgb.DMatrix(module_expression_test), pred_contribs=True, approx_contribs=True)\n",
    "pred = bst.predict(xgb.DMatrix(module_expression_test))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/slund1/anaconda3/lib/python3.5/site-packages/scipy/cluster/hierarchy.py:298: ClusterWarning: scipy.cluster: The symmetric non-negative hollow observation matrix looks suspiciously like an uncondensed distance matrix\n",
      "  return linkage(y, method='complete', metric='euclidean')\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAARYAAADUCAYAAABZGgxEAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3Xl8VOXVwPHfySSQhbAoiwjIvm8RIiivFtAXCygutVVx\n30q12kJbF1pb9a1V0baKFhU33KograioqMUCCipKosgq+25kDUnIPjPn/ePeDJN9JpkhCTnfz2eY\ne5+7nZkkD8+9zyaqijHGRFJMXQdgjDn+WMZijIk4y1iMMRFnGYsxJuIsYzHGRJxlLMaYiItaxiIi\ns0Rkn4isqWS7iMgTIrJZRFaJyJBoxWKMObaiWWJ5CRhbxfZxQE/3NQl4OoqxGGOOoahlLKr6KXCo\nil0uBF5Rx3KgpYi0j1Y8xphjpy6fsXQAdgWt73bTjDENXGxdBxAKEZmEc7vEidJ0aBdPUh1HVH8o\ngDj/lHTOUAkkutul0n0RUHffkjcQ9xxuogRfK7ABRALXUnG3SdAygop7fPA2xF2nzLuzLDHOsogg\nMTjvIqXTBYiRo9uOhhk4nYms9PT0A6raJpR96zJj2QN0Clrv6KaVo6rPAs8CDI5vq+93+AkS1MdJ\nju5Yeh0C+1Wc5p4jqLtUSZqEmRb0pxpSWnXnj+E468OlEK2P5Eco8ngo8ngo9HgoiI2j0OOh0BNL\nYWwsOfHxZCUkkBOfQG5iArmJiRQkJVDULJHCZokUNUvAm5yIv3kScS0Tad6iKfFxHuJjhaYeISlO\naN/MQ3KTGJJinfXEuBj3XYhpJLmYiOwIdd+6zFjmA7eJyBxgOJClqhnVHRQ34BTapc2MenDHSkkn\n0JK/Ob86+WORgl8VnzppPp8ff8my3+++g9/nx+f3oz7F7/Xj9/nx+xS/z4/6/Pjcd/Urfq8Pv1/x\n+xT1+dx391i/H3x+/H43zetHtWS7c3zwsnNxPxT7kGIveH1IsQ/xeonx+hD3FVPsDVr2EePzEeMt\neTn7xvjcbd6j2z1eb9CyD4/PR6zPeY9zl2P9fpp4fcSgxPu8xPu8Efu5FHo87EtsxpG4Jhxp0oQj\ncU3Y0iyZzKYJZMYncNh9z4yPJz8xkYJmCRQ3S6Q4OQlfcgJJTZyMJykuhjYJMXRIjqVTsodmcTEk\nNxGSm8TQKt5ZTogVEmJjaOpxSmfHg6hlLCIyGxgFtBaR3cC9QByAqs4EFgDjgc1AHnB9tGKpz0p+\nkUp+nWLchbhSqWBNjqrg9UGxD4qKocjrvorx5RfhzS/Gl5WHL/MImpWH73Au/sO5+A/nQXYukpWL\nJzsfT04esTm5xOXmE1dYTFOfj045WTUK50BCIvsSkyjwxJEXF8e2Fq3Yn5DEuqRmZDdpyo7mLfmk\nU1dymzQtdZwACbFO6ahDMw+t4mM4MT6GjsmxtEmIoV2Sh5ObeejVKo7WCTH1OhOShjZsQmpqqqal\npdV1GOZ4pgp5hXAox3nPK4TsPNifBYdzneXDuZCVh2YdwZ+Zi2blQVYucjgXT15BSJdZ1/5kdjZv\nye5mzdncvBX/6dSN705oQ35ck2qPTYoTOiV76JgcywnxMTRvEkObxBgGtG7CiJOb0rm5J+IZj4ik\nq2pqSPtaxmJMBKnC94cgJx8Ki+HwEWf9UA4czIZ9WfDNFtj6g3NfWwFfUjw/nDucDePPZEuHk9mV\nD/vyfezP87Mz28vGzGKyi6r+u+3c3MPIjvGc0zmeC3sk0qJp7Uu8lrEYU99l58HGPfBDppPxfLMF\nvtoA+7OdDKlE0zjo2wlOaQNd2kFKN+jfmcxWzdldFMPuHB9ZRX6yCv18f8TH13uL+Pz7Qg4V+Etd\nLqVtHD1bxjGqUzz/2zmeXifEhR2yZSzGNFSq8M1WeHEhpG+GLVXUZ7RpAQ9cA2f1hxOSA8l+Vd7Z\nnM87m/NYtqeQrYe95Srk/qdDU24Y0IyLeybQKt4TUmiWsRhzvMg8Apv2wPZ9sH4XrNvpvO89fHSf\nWA90bA2dWjuZzJhToVcHJx3ILPCx4ZCX5RmFfLgtn493FOBz/+xjBHq1iuX3w1twTf9mVYZiGYsx\nx7v1u+DRt2DXAfh2a/nnNUnxMHog3H4J9OlYqsXg/jwfT6/M4eOdBSzbXRgozZyU5GHZxHZ0b1nx\nbZJlLMY0JnmFsOcgrN4O//kaPkiDgqDnNAO7wPDecPmPnOUguUV+xs3bx9LdhQD8YXhzHjirVYWX\nsYzFmMZMFf65GBascJ7TZOUd3ZbxKsSUryH6zeJDTE/P4ReDmzFzzIkVnjacjMVaXRlzvBGBq8+G\n2XfB10/AC5OPbrvzRcgt384mtZ3TduaZb49wMN9X6xCqzVhEpK2IXCwit4rIDSIyTEQsQzKmIWiW\nAOcPgx8NcNZfXQTdb4JLH4KDOYHdTj/5aCvgc+bupbZ3MpVmECIyWkQ+At7HGZSpPdAP+COwWkT+\nT0Sa1+rqxphj47lfw68vgMFdnVulT9bAgFtg/pcAdG8Zx0tjnVugb/cXs+FQ7fpdVfqMRUT+CvxD\nVXdWsC0WOB/wqOqbtYogTPaMxZhaWrsTbpwO2/ZC25aw+snApn6zvmf9oWKe/t8TuDkludRhEXnG\noqp3VJSpuNu8qvr2sc5UjDER0P8UWPZXiPPAvsNw29OB1r4nJTlZgq+WdTqV9m4Wkd9WdaCqPlq7\nSxtj6kysB34xDma8B/9aBqu2wdzfM7htExbvKqSoljlLVQ9hk91XKnALzrCRHYCbARtR35iG7k8T\nndojgA174Lx7aeKO21FYy4yl0hKLqv4fgIh8CgxR1Rx3/T6cB7rGmIbukeuh58lwzz9hzyHi3WEM\no1liKdEOKApaL3LTjDENXUyMc0uU0BRUSfQ5z1qWZxTW7rQh7PMK8JWI3OeWVr4EXq7VVY0x9UuS\n046l38p1AHywrYDH0rJrfLpqMxZVfQBn2MhM93W9qj5Y4ysaY+qflG4AjP12ZSDpt0syWbO/qLIj\nqhRqC9pEIFtVHwd2i0jXGl3NGFM//WoCAHEfpLHr4qPT62zLrllDuVCa9N8L3AX83k2KA/5Zo6sZ\nY+qnAZ2htdOQvuOIyTyVuRaAtB+iV2K5GLgAyAVQ1e9xqqGNMceLZgkw725o6ZRWfvHC67TKz+PZ\nVUdYUYMHuaFkLEXqtPt3J8ITm4bQmONR746wYjoAMar0ObSfH3J9nP/WvrBPFUrGMldEnsGZtP3n\nwMfAc2FfyRhT/zVPhAuGA/BiH+f5yuFCf1VHVCiUWqG/Af8G3gR6A/eo6j/CvpIxpmHo3BaA3ve8\nQOu8I5XNUlKlkGqFVHWh2ynxdlVdGOrJRWSsiGwQkc0iMrWC7S1E5F0R+VZE1opIo5wN0Zh65bzT\nAovXrfk6OhmLiPxERDaJSJaIZItIjohU23JGRDzAkzhjufQDJopIvzK73QqsU9XBONOx/l1Eqp8G\nzhgTPad2h5+MAKBLVmbUSiyPABeoagtVba6qyaoaygBPw4DNqrpVVYuAOcCFZfZRIFmcuSCbAYeA\nyM3sbYypmQnDAJj43SqAsEeUCyVj2auq68ONC6cn9K6g9d1uWrAZQF/ge2A1MFlVyz0pEpFJIpIm\nImn79++vQSjGmLB0aw/ACQX5nHQkO+xSSygZS5qIvCEiE93bop+IyE/Cj7RCPwZWAicDKcCMioa7\nVNVnVTVVVVPbtGkToUsbYyrVp2Ng8Zq130QlY2kO5AHnAhPc1/khHLcH6BS03tFNC3Y9ME8dm4Ft\nQJ8Qzm2MibbbnfLDj7dvKjdFa3UqHY+lhKrWtKZmBdDT7Ve0B7gcuKLMPjuBc4ClItIOpzp7aw2v\nZ4yJpAnD4W/z6Hb4UNgllqqGprxTVR8RkX9A+QxLVX9d1YlV1SsitwEfAR5glqquFZGb3e0zgfuB\nl0RkNSDAXap6ILyPYIyJipOcGRFb5+fh94Y311BVJZaSB7Y1HhJfVRcAC8qkzQxa/h7nFssYU9+0\nSGRby1Z0PZxJ/ierwzq0qqEp33XfbVAnYxojERZ2782k9OXI1h/COrTaZywi0gZn2IR+QHxJuqqe\nHW6cxpiGJTve+ZOXjWXrXaoWSq3Qazi3RV2B/wO24zyYNcYc5wrj4gCI+W5XNXuWFkrGcqKqvgAU\nq+onqnoDYKUVYxqBFR07AyD54Q34VO2tEFDsvmeIyHk4rWRPCOsqxpgG6UAzd0y3CNYKlfiLiLQA\nfgf8A6fB3G/CC88Y0xBJyT2NP7wxWUJpIPeeu5gFjA4vLGNMg1aSs4TZQq6qBnIVNowrUV0DOWPM\nccCdcjXcmRGrKrHUuGGcMeb4kNzUKbHszQlvNJOqGsiVahjn9jrWkjmcjTHHvz+f2Qr+BhrmM5ZQ\nRpBLdfvyrALWuMNIDq1hnMaYBiT1ZLeBXJjHhVIrNAv4paouBRCRM4EXgUFhXssY09CIk6XERGEE\nOV9JpgKgqsuw4SONaRxiapaxhFJi+cSdV2g2Ti3RZcASERkCoKpfh3VFY0zD4WYsEoWMZbD7fm+Z\n9FNxMhpr3m/M8aqGt0KhNJCzRnHGNFY1vBUKpVboVbdJf8l6ZxH5b7jxGWMaILflrYQ56m0oD2+X\nAV+KyHh37uaFwPRw4zPGNEBuPXM0boWeEZG1wGLgAHCqqoY3nJQxpmGKccoe0bgVuhqnLcs1wEvA\nAhEZXOVBxpjjQ0mtUJiHhVIrdAlwpqruA2aLyFvAyzgTjBljjmdRrBW6qMz6VyIyLKyrGGMaphq2\nYwnlVqiXiPxXRNa464OAO2sQojGmoYlWdTPwHPB73CEqVXUVzqyG1RKRsSKyQUQ2i8jUSvYZJSIr\nRWStiHwSauDGmGOg5FYozOrmUJ6xJLq3P8Fp1fYVEhEP8CQwBtgNrBCR+aq6LmiflsBTwFhV3Ski\nbcOK3hgTXe7ffVJxcTU7lhZKieWAiHTHHU1ORH4KZIRw3DBgs6puVdUiYA5wYZl9rsCZFH4ngPuA\n2BhTXzSNI7dJk7APCyVjuRV4BugjInuAKcDNIRzXAQiejGS3mxasF9BKRJaISLqIXBPCeY0xx4on\nhlWdTgn7sFBqhbYC/ysiSUBMhEeQiwWGAucACcAXIrJcVTcG7yQik4BJAKecEv6HNMbU3Ge9enPG\nls1hHRNKiQUAVc0NM1PZA3QKWu/opgXbDXzknvsA8ClHe1MHX/tZVU1V1dQ2bdqEEYIxprY03NZx\nhJGx1MAKoKeIdBWRJjg1SfPL7PMOcKaIxIpIIjAcZzpXY0w9UYN8JaRaoRpRVa+I3AZ8BHiAWaq6\nVkRudrfPVNX1IvIhzni6fuB5VV0TrZiMMeFTCT9rqTZjcUsSvwNOUdWfi0hPoHfQRGaVB6S6AFhQ\nJm1mmfW/An8NK2pjTL0Wyq3Qi0AhcIa7vgf4S9QiMsbUKzUosISUsXRX1Uc42vI2j5rddhljGiCt\nwZ97KBlLkYgkcLSBXHecEowxphGI1sPbe4EPgU4i8hrwP8B1NbiWMaYB0pgoPLxV1YUi8jVwOk7m\nNdltc2KMaQRqUmIJZdiEiwGvqr7v1gR5ReSi6o4zxhwnavD0NpRnLPeqalbJiqoepvwcQ8aY41R4\nAyY4QslYKtonag3rjDH1S1RuhYA0EXlURLq7r0eB9BpcyxjTANWk5W0oGcuvgCLgDfdViDOUgjGm\nEYhKdbOq5gIVDitpjDn+1aR3cyh9hXoBtwNdgvdXVZsM3pjGIBqdEIF/ATOB5wFf2FcwxjRo0Wp5\n61XVp2twbmPM8SBKD2/fFZFfikh7ETmh5BV+dMaYxiKUEsu17vsdQWkKdIt8OMaY+iYqAz2patca\nRWOMOT5Eo1YIQEQGAP2A+JI0VX0l/MsZYxqaqDy8FZF7gVE4GcsCYBywDLCMxZjGIEoPb3+KM+/P\nD6p6Pc70HC3CvpIxptEIJWPJV1U/znAJzYF9lJ4vyBhzPIvSM5Y0d/L253A6Hx4Bvgj/UsaYhqgm\nY96GUiv0S3dxpjsHUHNVXRX2lYwxDVKr+PDnNaw0YxGRPqr6nYgMqWDbEFX9OuyrGWManKv7Nwv7\nmKpKLL/FmYj97xVsU6DaTogiMhZ4HGcmxOdVdVol+52Gc3t1uar+u7rzGmOOHU8kn7Go6iQRiQH+\nqKqfhXtiEfEATwJjcCZ/XyEi81V1XQX7PQz8J9xrGGOOgUhXN7u1QTNqGM4wYLOqblXVImAOcGEF\n+/0KeBOntskYU89EaybE/4rIJSJhn74DsCtofbebFiAiHYCLAes9bUw9Ff6ffmgZyy9wxmQpFJFs\nEckRkeywr1Sx6cBdbsmoUiIySUTSRCRt//79Ebq0MSYk0WjHoqrJNYkFZ/L44IZ0Hd20YKnAHDdH\nbA2MFxGvqr5dJoZngWcBUlNTazIbgTGmhqI10BMi0groSelOiJ9Wc9gKoKeIdMXJUC4HrgjeIbjn\ntIi8BLxXNlMxxtQticYUqyJyEzAZp8SxEmeq1S+oprpZVb0ichvwEU518yxVXSsiN7vbZ4YdrTGm\nQQilxDIZOA1YrqqjRaQP8GAoJ1fVBTg9ooPTKsxQVPW6UM5pjDnGovTwtkBVC5zzS1NV/Q7oHfaV\njDENU5RG6d/tdkJ8G1goIpnAjrCvZIxpNEKpFbrYXbxPRBbjjMXyYVSjMsbUH1GasOwJYI6qfq6q\nn9QgLGNMIxPKM5Z04I8iskVE/iYiqdEOyhhTj0Tj4a2qvqyq43FqhjYAD4vIpvCjM8Y0TNGpFSrR\nA+gDdAa+C/tKxphGo9qMRUQecUsofwZWA6mqOiHqkRlj6ocojXm7BThDVQ+Ef3pjTGMUSnXzM8ci\nEGNMPRWllrfGmMbMMhZjTH0Q0rAJACISD1wFJACvq+rBqEVljKk/ojQ0ZYnHgSIgE6ffkDHGVKjS\njEVEZotI96CkE3CGqHwTaBXtwIwx9USEezffDfxFRDKA+4G/AW/hjCJ3Xw3CM8Y0ElXNK7QVuEJE\nzgTeAN4HzlNV37EKzhhTD0SyVkhEWonIrUA/4Gc4z1Y+EhFrdWtMYxLhh7dvA4dxplN9VVVfBSYA\np4rIuzWJzxjTOFT1jOVE4N841cu/AFDVfODPItL+GMRmjKkPIvzw9l6ckeJ8wNTgDaqaEfaVjDGN\nRlUPb9/EqVo2xjRmNZhXqKqHt8+JyIBKtiWJyA0icmXYVzTGNCyn9Qr7kKpuhZ4E7hGRgcAaYD9O\nG5aeQHNgFvBa+FEaYxqUlklhH1LVrdBK4FIRaYYzx3J7IB9Yr6obQjm5iIzF6QrgAZ5X1Wlltl8J\n3IVToZUD3KKq34b9KYwx0RMTfl/lUMZjOQIsCffEIuLBKfWMAXYDK0RkvqquC9ptGzBSVTNFZBzO\nxO/Dw72WMSaKIvmMJQKGAZtVdauqFgFzgAuDd3CnFMl0V5fjzA9tjKlP6tl4LB2AXUHru920ytwI\nfFDRBhGZJCJpIpK2f//+CIYYOQ888AD9+/dn0KBBpKSk8OWXXwIwatQo0tLSAvtt376dAQNKPxOf\nMmUKHTp0wO/3B9Jeeukl2rRpQ0pKCv369eO5556r8LrffPMNN954Y6m0iy66iNNPP71U2nXXXce/\n//3vUmnNmjULLG/cuJHx48fTs2dPhgwZwqWXXsrevXvD+AbKO3ToEGPGjKFnz56MGTOGzMzMCvd7\n/PHHGTBgAP3792f69OmB9JUrV3L66aeTkpJCamoqX331FQCrV6/muuuuq1VsJjy+cDMXVY3KC/gp\nznOVkvWrgRmV7DsaWA+cWN15hw4dqvXN559/rqeffroWFBSoqur+/ft1z549qqo6cuRIXbFiRWDf\nbdu2af/+/QPrPp9PTznlFB0+fLguWrQokP7iiy/qrbfeqqqqe/fu1datW+sPP/xQ7to//elPdeXK\nlYH1zMxM7dixo/bp00e3bNkSSL/22mv1X//6V6ljk5KSVFU1Pz9fe/ToofPnzw9sW7x4sa5evTr8\nLyPIHXfcoQ899JCqqj700EN65513lttn9erV2r9/f83NzdXi4mI955xzdNOmTaqqOmbMGF2wYIGq\nqr7//vs6cuTIwHHnnHOO7tixo1bxmdAVn3SVAmka4t9/pc9Y3GckN+Hcnnyoqp8Fbfujqv6lmjxr\nD9ApaL2jm1b2OoOA54FxGoHBo+Rv0ZlWWm/vXOm2jIwMWrduTdOmTQFo3bp1yOddsmQJ/fv357LL\nLmP27NmMHj263D5t27ale/fu7Nixg3bt2gXSc3JyWLVqFYMHDw6kzZs3jwkTJtCuXTvmzJnDH/7w\nh2pjeP311znjjDOYMOFoN7BRo0aF/Bkq884777BkyRIArr32WkaNGsXDDz9cap/169czfPhwEhMT\nARg5ciTz5s3jzjvvRETIzs4GICsri5NPPjlw3IQJE5gzZw533nlnreM01fOHWWKp6lboGWAkcBB4\nQkQeDdr2kxDOvQLoKSJdRaQJcDkwP3gHETkFmAdcraobw4q8Hjn33HPZtWsXvXr14pe//CWffFJ6\nJtorr7ySlJQUUlJSGD9+fKlts2fPZuLEiVx88cW8//77FBcXlzv/1q1b2bp1Kz169CiVnpaWVu62\nquR8EydOZPbs2SHFv2bNGoYOHVrtfjk5OYHPUfa1bt26cvvv3buX9u2d3h8nnXRShbdWAwYMYOnS\npRw8eJC8vDwWLFjArl3OHfT06dO544476NSpE7fffjsPPfRQ4LjU1FSWLl0a0ucztadhZixV1QoN\nU9VBACIyA3hKROYBEwmhv6OqekXkNuAjnOrmWaq6VkRudrfPBO7B6ZP0lDiBe1W1VlO4VlWyiJZm\nzZqRnp7O0qVLWbx4MZdddhnTpk0LPAd47bXXSE11Ptb27ds5//zzASgqKmLBggU8+uijJCcnM3z4\ncD766KPA9jfeeINly5bRtGlTnnnmGU444YRS183IyKBNmzaB9b1797Jp0ybOPPNMRIS4uDjWrFnD\ngAEDkAp+MSpKq0pycjIrV64M65jga1V0vb59+3LXXXdx7rnnkpSUREpKCh6PB4Cnn36axx57jEsu\nuYS5c+dy44038vHHHwNOKe7777+vUSwmfOGWWKrKWJqULKiqF5gkIvcAi4BmlR4VRFUXAAvKpM0M\nWr4J53arwfN4PIwaNYpRo0YxcOBAXn755WofMH700UccPnyYgQMHApCXl0dCQkIgY7nsssuYMWNG\npccnJCRQUFAQWJ87dy6ZmZl07doVgOzsbGbPns0DDzzAiSeeWOrh6aFDhwK3bP379y9XyqpITk4O\nZ511VoXbXn/9dfr161cqrV27dmRkZNC+fXsyMjJo27ZthcfeeOONgQfQf/jDH+jY0akcfPnll3n8\n8ccB+NnPfsZNNx39VSkoKCAhIaHamE1kRPJWKM1t4Bagqn8GXgS6hB3ZcWzDhg1s2nR0OuuVK1fS\nuXP1JafZs2fz/PPPs337drZv3862bdtYuHAheXl5IV23b9++bN68udT5Pvzww8D50tPTmTNnDuA8\nM3njjTcoKioCnFqnkuc5V1xxBZ9//jnvv/9+4Fyffvopa9asKXW9khJLRa+ymQrABRdcwMsvvww4\nmcSFF15Ybh+Affv2AbBz507mzZvHFVdcAcDJJ58cyPAWLVpEz549A8ds3Lix3G2giZ6I3Qqp6lWV\npD+P87DVuI4cOcKvfvUrDh8+TGxsLD169ODZZ5+t8pi8vDw+/PBDZs4MFOBISkrizDPP5N13Qxvu\npk+fPmRlZZGTk8PBgwfZsWNHqWrmrl270qJFC7788kvOP/980tPTGTp0KB6Ph+7duweunZCQwHvv\nvceUKVOYMmUKcXFxDBo0KFBaqKmpU6dy6aWX8sILL9C5c2fmzp0LwPfff89NN93EggVOYfaSSy7h\n4MGDxMXF8eSTT9KyZUsAnnvuOSZPnozX6yU+Pr7Ud7p48WLOO++8WsVnQhduiUXUqe6tfAcRj9aj\n4ShTU1M1uF1IY/fYY4+RnJxc6jbheFdYWMjIkSNZtmwZsbEhz2BjauFwl5toteOF9FCfgVbZQE5E\nkoF3IhKZiYpbbrklUM3dWOzcuZNp06ZZpnJMRehWyB0l7m3ggVpGZKIoPj6eq6++uq7DOKZ69uxZ\n6nmLiT5/mP2FqsrylwJ3qOr8KvYxxjQCkawVyqTqvj3GmEYi3FqhqjKWUcA4dwoQY0wjFrESi6rm\nAhcAp9YyJmNMAxfJEguq6nNbx5pqeDweUlJSGDBgAD/72c+qbeT24IMPBpYrGkqhMtOnT+eVV14J\nrHu9Xtq0acPUqaUmUqBLly4cOHAgsL5kyZJAi16ADz74gNTUVPr168epp57K7373u5CuX5X09HQG\nDhxIjx49+PWvf01FTRmKioq4/vrrGThwIIMHDw50UizbD6l169ZMmTIFgBkzZjBr1qxax2dqLtwS\nS02GQ4gBrgz3uEi96uOwCapHhyBQVb3iiiv073//e8j7lx1KoTLFxcU6cOBALS4uDqQtWLBAR4wY\nod26dVO/3x9I79y5s+7fvz+wvnjxYj3vvPNU1RmqoFu3brp+/XpVVfV6vfrUU09Ve/3qnHbaafrF\nF1+o3+/XsWPHBoY8CDZjxgy97rrrVNUZDmLIkCHq8/nK7TdkyBD95JNPVFU1NzdXU1JSah2fqbnd\nvW+N2LAJzYFbcR7gzgcWArcBvwO+pb4OpN0uShMH7A3945511lmsWrUKcAZd2rVrFwUFBUyePJlJ\nkyYxdepU8vPzSUlJoX///jzwwAP4fD5+/vOf8/nnn9OhQwfeeeedcn1hFi1axJAhQ0q135g9ezaT\nJ0/m6aef5osvvmDEiBHVxvfII49w991306dPH8Apbd1yyy0hf76KZGRkkJ2dHWj5e8011/D2228z\nbty4UvutW7eOs88+G3A6ErZs2ZK0tDSGDRsW2Gfjxo3s27cv0C8pMTGRLl268NVXX5Xazxw7kbwV\nehXoDazG6Si4GGfwpotUteJOHwav18sHH3wQ6Fg4a9Ys0tPTSUtL44knnuDgwYNMmzaNhIQEVq5c\nyWuvORnWpk2buPXWW1m7di0tW7bkzTfLT+n02WeflRreoKCggI8//pgJEyZEZZiExYsXVzhEQkWZ\n1549ewJbQbimAAAKuElEQVSdBwE6duzInj3lht9h8ODBzJ8/H6/Xy7Zt20hPTw8Mk1Bizpw5XHbZ\nZaV6Q9swCXUrkr2bu6nqQAAReR7IAE5R1YIqjql7YZQsIqmkBAJOiaWkt+4TTzzBW2+9BcCuXbvY\ntGkTJ554Yrnju3btGjh+6NChbN++vdw+GRkZ9O3bN7D+3nvvMXr0aBISErjkkku4//77mT59Oh6P\nJyLDJIwePbrGwyRU5oYbbmD9+vWkpqbSuXNnRowYERgmocScOXN49dVXS6W1bduW7777LqKxmNBF\ncjyWwIhDquoTkd31PlOpQyUlkGBLlizh448/5osvviAxMZFRo0aVGuYgWHCzfI/HQ35+foXXCD5+\n9uzZLFu2jC5dugBw8OBBFi1axJgxYwLDJJQMjVB2mIT09PRSI89VZPHixfzmN78pl56YmMjnn39e\nKq1Dhw7s3r07sL579246dCjfDCo2NpbHHnsssD5ixAh69To6Ida3336L1+stV6KyYRLqViRvhQaL\nSLb7ygEGlSyLSHatomwksrKyaNWqFYmJiXz33XcsX748sC0uLq7C0eKqEjxMQnZ2NkuXLmXnzp2B\nYRKefPLJwO3QqFGjAv/r+3w+/vnPfwaGSbjjjjt48MEH2bjRGbTP7/eX6mVdoqTEUvZVNlMBaN++\nPc2bN2f58uWoKq+88kqFwyTk5eWRm5sLwMKFC4mNjS015ELJCHhl2TAJdSuS7Vg8qtrcfSWramzQ\ncvNaR9oIjB07Fq/XS9++fZk6dWqpIQ0mTZrEoEGDuPLK0B82jxs3jk8//RSAt956i7PPPrtUSefC\nCy/k3XffpbCwkD/96U9s3ryZwYMHc+qpp9KjRw+uusoZCWPQoEFMnz6diRMn0rdvXwYMGMDWrVtr\n/XmfeuopbrrpJnr06EH37t0DD27nz5/PPffcAzhjrwwZMoS+ffvy8MMPl7vlmTt3boUZy2effcaY\nMWNqHaOpmXBLLHVSZVybV32tbj5WLrroIt24cWNdh3FMff3113rVVVfVdRiN2sbBvwurujma8wqZ\nKJg2bRoZGRl1HcYxdeDAAe6///66DqNRi+TDW1MP9e7dm969e9d1GMeU3QLVPQ3zTshKLMaYakW0\nr5AxxgCohJdVWMZijKlWJAd6qjURGSsiG0Rks4hMrWC7iMgT7vZVIjIkmvEYY2qm3twKuXM/PwmM\nA/oBE0Wk7OQz44Ce7msS8HS04jHG1JyGOeZtNEssw4DNqrpVVYuAOUDZppgXAq+4VeXLgZbuIN7G\nmHqkPt0KdQCCu63upvwYuqHsY4ypY8dlOxYRmYRzqwRwREQ2VHNIa+BANfscS/UpnvoUC1g81alP\n8YTcgCqaGcseoFPQekc3Ldx9UNVngarnLA0iImka4oxtx0J9iqc+xQIWT3XqUzwiEvIUpNG8FVoB\n9BSRriLSBLgcZyS6YPOBa9zaodOBLFVtXO3VjTkORa3EoqpeEbkN+AjwALNUda2I3OxunwksAMYD\nm4E84PpoxWOMOXai+oxFVRfgZB7BaTODlhVnXN1IC/m26RipT/HUp1jA4qlOfYon9McRWsEUDcYY\nUxvWpN8YE3ENImMRkVkisk9E1gSl3Scie0RkpfsaH7Tt9243gQ0i8uOg9KEistrd9oSEO7p01fH8\nVUS+c7smvCUiLd30LiKSHxTnzKBjohnPCSKyUEQ2ue+tgrZF9fsJOl/voM+90h3adEpNfnaRIiLb\n3c+4sqSWoybfVYRi6SQii0VknYisFZHJbnqdfT9VxFpl95xyQh0Rqi5fwI+AIcCaoLT7gNsr2Lcf\nzrxHTYGuwBbA4277CjgdEOADYFwE4zkXiHWXHwYedpe7BO9X5jzRjOcRYKq7PDUonqh/P5XE6AF+\nADrX5GcXwTi2A63LpIX9XUUolvbAEHc5GdjoXrPOvp8qfnZbgG5AEzeGflUd0yBKLKr6KXAoxN0v\nBOaoaqGqbsOpcRrmdhVorqrL1fm2XgEuilQ8qvofVfW6q8tx2uRUKtrx4HwPL7vLLwedO+rfTyXO\nAbao6o4q9qkwtgjGUNV1Q/6uInVRVc1Q1a/d5RxgPVW3PK+r7yeU7jmlNIiMpQq/cm89ZgUVXyvr\nJtDBXS6bHg034PyPX6KrW6T9RETOCoozmvG006Ntgn4A2gVdty6+n8uB4BnVwvnZRZICH4tIujgt\nuiH87yriRKQLcCrwpZtUV99PRcK+bkPOWJ7GKZql4Eym9ve6DcchIncDXo5OQVsy0VsK8FvgdXGm\nrz1m3BJInVX/idNA8gLgX25SXf7sznR/FuOAW0XkR8Eb6+K7EpFmwJvAFFXNpp7+boejwWYsqrpX\nVX2q6gee42iRsLJuAnsofXtSYfeB2hCR64DzgSvdX1DcYutBdzkd51611zGIZ697e1Ny27XPTa+L\n72cc8LWq7oUa/ewiRlX3uO/7gLfca4f7XUWMiMThZCqvqeo8N7Y6+34qEfZ1G2zGIqWHV7gYKKkR\nmQ9cLiJNRaQrzlgvX7lF3WwROd2t7bgGeCeC8YwF7gQuUNW8oPQ24oxNg4h0c+PZGu14cL6Ha93l\na4POXRffz0SCboPC/dlFKAZEJElEkkuWcR64ryHM7yqC8QjwArBeVR8NSq+T76cKoXTPKS3aT5Qj\n9FR6Nk6RsBjn/u5GnEnrVwOr3A/ZPmj/u3FKBhsIqtkAUnF+SFuAGbgNBCMUz2ac+9CV7mumu+8l\nwFo37WtgwjGK50Tgv8Am4GPghGP1/ZSJLQk4CLQISgv7Zxeh36NuODUa37o/k7vd9LC/qwjFcybO\nbdeqoN+b8XX1/VQT63icWqstJd9bVS9reWuMibgGeytkjKm/LGMxxkScZSzGmIizjMUYE3GWsRhj\nIq5BDKZt6o6IPAT8B2gB9FXVh+o4JNMAWInFVGc4TqfKkcCnNT2JiNh/Yo2IZSymQuKML7MKOA34\nArgJeFpE7qlg3+4istwd5+QvInLETR8lIktFZD6wzk37rYiscV9T3LQuUnosmdtF5D53eYmIPO52\n4lwjIsPc9JFB45V8U9Ki1tQP9r+IqZCq3iEic3Ga9v8WWKKq/1PJ7o8Dj6vqbHEHSw8yBBigqttE\nZCjOgOnDccZ8+VJEPgEyqwknUVVT3A6Ds4ABwO3Arar6mduJr6Amn9NEh5VYTFWG4DR/74MzVkhl\nzuBoz+XXy2z7Sp2xQ8Bpwv6Wquaq6hFgHnAW1ZsNgXFnmoszOt9nwKMi8mugpR4dC8fUA1ZiMeWI\nSArwEk4v1gNAopMsK4EzVDU/jNPlhrCPl9L/ycWX2V6234mq6jQReR+nD8tnIvJjVf0ujLhMFFmJ\nxZSjqivVGbOkZKjERcCPVTWlkkxlOU5nS3B6vlZmKXCRiCS6vYsvdtP2Am1F5EQRaYoz9ESwywBE\n5EycSe2yRKS7qq5W1Ydxet/2qdmnNdFgJRZTIRFpA2Sqql9E+qjquip2nwL80x3k6kMgq6KdVPVr\nEXmJo139n1fVb9zr/dlN3wOULXkUiMg3QBzO6HwAU0RkNODH6an8AabesN7NptZEJBHIV1UVkcuB\niapa5ZioYZx7Cc7A0iHPG2zqnpVYTCQMBWa4Axcd5mipwjRSVmIxxkScPbw1xkScZSzGmIizjMUY\nE3GWsRhjIs4yFmNMxFnGYoyJuP8HVlaGFuAYrMoAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x1a15ca1d30>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "pl.close()\n",
    "pl.rcParams[\"figure.figsize\"] = (4,3)\n",
    "plot_m(out, pred, \"SHAP\", color=\"#008BE0\")\n",
    "plot_m(out_path, pred, \"Path\", color=\"#ff165a\")\n",
    "#plot_m(module_expression_test_std, pred, \"Unsupervised\", color=\"#18C45D\")\n",
    "pl.legend(loc=\"lower left\", frameon=False, prop={'size':10})\n",
    "pl.ylabel(\"R^2 (% variance explained)\")\n",
    "pl.xlabel(\"# groups\")\n",
    "pl.ylim(0,1)\n",
    "pl.xlim(0,len(target_test))\n",
    "pl.gca().invert_xaxis()\n",
    "#pl.figsize(5,4)\n",
    "#pl.figure(num=0, figsize=(4, 3))\n",
    "#pl.savefig(\"census_data2.pdf\")\n",
    "pl.show()"
   ]
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python [default]",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
